.libPaths(c("/data/rajewsky/home/skim/R/usr_lib_Seurat/", "/data/rajewsky/home/skim/R/usr_lib_v4/"))
#.libPaths()
unloadNamespace("mgcv")
unloadNamespace("Matrix")


library(ggplot2)
library(gridExtra)
library(ggrepel)
library(dplyr)
library(cowplot)
library(paletteer)
library(stringr)
library(scales)
library(readxl)
library(randomForest)
library(GGally)
library(igraph)
library(paletteer)
library(stringr)
library(scales)
library(tidyr)
library(readxl)
library(ggraph)
# miR-7 target genes 
mir7target.df <- read.table("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/TargetScan7.2__miR-7-5p.predicted_targets_lists_without_header.txt")
# miR-7 targe genes from mirdb
mir7mirdb.df <- read.csv("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/20220423_mirdb_mir_7_extract.csv", header = TRUE)

# load pheno genes
cledi_pheno_genes <- read_excel("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/biased_list_of_phenotype_related_genes.xlsx", col_names = FALSE)

# TF genes
TF_list <- read.csv("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/Mouse_TFs_list.txt", stringsAsFactors = FALSE)
TF_list[725,1] <- "Auts2"
colnames(TF_list) <- "TF"

# load pheno genes
cledi_pheno_genes <- read_excel("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/biased_list_of_phenotype_related_genes.xlsx", col_names = FALSE)

#########################
# DE data load
#########################
res1.df <- readRDS(file = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/DESeq2_WTN_batch_corrected_res1.rda")
res2.df <- readRDS(file = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/DESeq2_WTN_nested_corrected_res2.rda")
res3.df <- readRDS(file = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/DESeq2_WTN_nested_corrected_res3.rda")
res4.df <- readRDS(file = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/DESeq2_WTN_batch_corrected_res4.rda")

# up-genes
set1 <- res1.df[which(res1.df$log2FoldChange > 0.5 &  res1.df$padj < 0.05), ]$gene
set2 <- res2.df[which(res2.df$log2FoldChange > 0.5 &  res2.df$padj < 0.05), ]$gene
set3 <- res3.df[which(res3.df$log2FoldChange > 0.5 &  res3.df$padj < 0.05), ]$gene
set4 <- res4.df[which(res4.df$log2FoldChange > 0.5 &  res4.df$padj < 0.05), ]$gene 

genes1 <- c(set1, set2, set3, set4) %>% unique()

# down-genes
set5 <- res1.df[which(res1.df$log2FoldChange < -0.5 &  res1.df$padj < 0.05), ]$gene
set6 <- res2.df[which(res2.df$log2FoldChange < -0.5 &  res2.df$padj < 0.05), ]$gene
set7 <- res3.df[which(res3.df$log2FoldChange < -0.5 &  res3.df$padj < 0.05), ]$gene
set8 <- res4.df[which(res4.df$log2FoldChange < -0.5 &  res4.df$padj < 0.05), ]$gene 

genes2 <- c(set5, set6, set7, set8) %>% unique()


# DE genes
DE_genes <- c(genes1, genes2)

# miR-7 target genes (in both DB)
mir7_genes <- intersect(mir7target.df$V1, mir7mirdb.df$X4)

# pheno type (17 genes)
# genes in DE
pheno_genes <- cledi_pheno_genes$...1[cledi_pheno_genes$...1 %in% DE_genes]
pheno_genes <- pheno_genes[!pheno_genes %in% mir7_genes]
low_count <- data.frame()

for(i in c(1:10)){
  final_low <- readRDS(paste0("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/GRN_RF/low/seed_", i,"/low_layer.rda"))
  final_low$seed <- i
  low_count <- rbind(low_count, final_low)

  
}

low_count2 <- low_count %>% dplyr::group_by(target, regulator)%>%
  dplyr::summarise(count = n())


high_count <- data.frame()

for(i in c(1:10)){
  final_high <- readRDS(paste0("/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Results/GRN_RF/high/seed_", i,"/high_layer.rda"))
  final_high$seed <- i
  high_count <- rbind(high_count, final_high)

  
}

high_count2 <- high_count %>% dplyr::group_by(target, regulator)%>%
  dplyr::summarise(count = n())
disconnetcted_genes <- setdiff(low_count2$regulator, high_count2$target)
low_count2 <- low_count2 %>% filter(!regulator %in% disconnetcted_genes)
input <- rbind(low_count2, high_count2)
input <- input[, c("regulator", "target" , "count")]
colnames(input) <- c("regulatoryGene", "targetGene", "weight")

input <- input[input$weight > 1, ]

input$weight <- input$weight / 10



Gsi <- graph.data.frame(input, directed = T)
Asi <- get.adjacency(Gsi,sparse = F,attr = "weight",type = "both")

g_arasi <- graph.adjacency(Asi,mode = "directed",weighted = T)

miR-7 regulator genes number of edges

table(high_count2[high_count2$count > 1, ]$regulator)
## 
##    Abi2   Aplp2    Car7  Dlgap1  Fbxo28   Fndc4  Gabra1 Gal3st3    Ggt7     Ide 
##       6       2       5       5       5       3       2       4       2      11 
##  Iglon5  Kif13a    Ltn1  Nucks1    Oxr1   Parp1   Pde4a    Ppif   Prkcb   Psme3 
##       3       4       2       1       2       3       7       7       1       1 
##  Ptgfrn    Raf1  Rgs7bp   Scn2b Slc38a2  Slc5a3  Smim12  Srgap2   Vdac1  Zbtb22 
##       4       3       1       4       3       3       2       1       4       1

miR-7 target middle genes number of edges (above)

table(high_count2[high_count2$count > 1, ]$target)
## 
## 2810468N07Rik          Abl2         Ackr1        B3gnt7          Ccnf 
##             3             2             2             3             1 
##          Cnr1       Dmrtc1a           Dtl        Elovl4         Epha5 
##             2             1             2             1             2 
##       Gm10419       Gm10605        Grin2a         Grtp1         Igf2r 
##             3             2             1             3             1 
##        Igsf9b        Kcnab2         Llgl2          Mcub         Mmp19 
##             2             4             2             3             1 
##        Mrps25           Pgr          Plat          Ptk7        Rab5if 
##             3             4             3             1             2 
##        Ralgds         Rgs13        Sh3rf3      Slc25a25        Tbc1d1 
##             1             2             2             4             2 
##       Tmem231        Tmem51         Tmub2        Tom1l2         Tor1b 
##             1             3             2             2             3 
##          Tpm2         Trim2       Txndc12         Usp31        Zdhhc3 
##             5             3             1            14             2 
##       mt-Cytb 
##             1

miR-7 target middle genes number of edges (below)

table(low_count2[low_count2$count > 1, ]$regulator)
## 
## 2810468N07Rik         Abcg4          Abl2         Ackr1           Ank 
##             3             2             1             3             2 
##        B3gnt7          Ccnf          Cnr1          Dlg5          Dlk1 
##             1             2             2             2             1 
##       Dmrtc1a           Dtl         Epha5        Fbxo10          Gad2 
##             1             2             2             2             2 
##       Gm10419       Gm10605       Gm16536        Grin2a         Grtp1 
##             3             1             3             2             1 
##         Igf2r        Igsf9b         Ipo11        Kcnab2        Kcnma1 
##             2             2             2             4             2 
##       Lamtor3         Llgl2          Mcub         Mmp19        Mrps25 
##             2             2             2             2             2 
##          Nab1           Ocm        Papss1           Pgr          Plat 
##             2             2             2             2             2 
##          Ptk7        Rab5if        Ralgds         Rgs13        Sh3rf3 
##             2             3             2             2             3 
##      Slc25a25         Smad3         Sulf2        Tbc1d1       Tmem231 
##             2             2             2             2             2 
##        Tmem51         Tmub2        Tom1l2         Tor1b          Tpm2 
##             3             2             2             2             3 
##        Trank1         Trim2       Tspan15       Txndc12         Uhmk1 
##             2             2             2             3             1 
##         Usp31        Zdhhc3        Zfp317        Zfp871       mt-Cytb 
##             3             2             2             2             3

phenotype genes number of edges

table(low_count2[low_count2$count > 1, ]$target)
## 
##   Cplx1   Prkcd  Sec24d Sec61a1  Snap91    Sncb    Stx2  Stxbp6    Syt1    Syt4 
##      11       6       1      11      18      19       1       7       1      13 
##    Syt6   Unc5a   Unc5b   Vamp3   Vamp8 
##       6       8       7      16       1
df <- names(V(g_arasi)) %>% as.data.frame()
colnames(df) <- "name"
set.seed(2)
df$y_coord <- ifelse(df$name %in% pheno_genes, runif(sum(df$name %in% pheno_genes, na.rm = TRUE), min = -1.1 , max = -0.9),
                   ifelse(df$name %in% mir7_genes, runif(sum(df$name %in% mir7_genes, na.rm = TRUE), min = 0.8 , max = 1.2), 
                          runif(sum(!df$name %in% c(pheno_genes, mir7_genes), na.rm = TRUE), min = -0.4 , max = 0.4)))

df$x_coord <- 0 
set.seed(2)
df$x_coord <- runif(nrow(df), min = -1 , max = 1)
ggraph(g_arasi, layout = "manual", x = df[,c("x_coord")],y = df[,"y_coord"]) +
  geom_node_point( shape = 21, size = 15) +
  geom_node_text(aes(label = df$name),
                 family = "serif", repel = FALSE, size = 3.5) +
  geom_edge_link(arrow = arrow(length = unit(4, 'mm')), aes(width = weight), 
                   end_cap = circle(3, 'mm'), edge_color = "black", edge_alpha = 0.1) + 
  scale_edge_width(range = c(0.5, 2.5)) + 
  #geom_edge_link(aes(edge_colour = factor(input$targetGene))) +  
  #geom_edge_link0(edge_colour = "black",edge_width = 0.1, edge_alpha = 0.5) + 
  #scale_fill_manual(values=c("grey66", "#EEB422","#424242"))+
  #scale_size(range=c(2,5),guide = FALSE)+
  theme_graph()+
  theme(legend.position = "bottom")

protein_link_sub_1st <- readRDS(file = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/ext_files/stringdb_data_extract.rda")
protein_link_sub_1st$link <- paste0(protein_link_sub_1st$gene1, "_", protein_link_sub_1st$gene2)

input$link <- paste0(input$regulatoryGene, "_", input$targetGene)
input$link_check <- ifelse(input$link %in% protein_link_sub_1st$link, TRUE, FALSE)


compg.edges <- as.data.frame(get.edgelist(g_arasi))
compg.edges$link <- paste0(compg.edges$V1, "_", compg.edges$V2)
compg.edges$link_check <- ifelse(compg.edges$link %in% protein_link_sub_1st$link, TRUE, FALSE)

compg.nodes = as_data_frame(g_arasi, "vertices")

final figure

g1 <- ggraph(g_arasi, layout = "manual", x = df[,c("x_coord")],y = df[,"y_coord"]) +
  geom_node_point(fill = as.factor(df$color), shape = 21, size = 14)+
  geom_node_text(aes(label = df$name),
                 family = "serif", repel = FALSE, size = 3.5) +
 geom_edge_link(arrow = arrow(length = unit(4, 'mm')), 
                start_cap = circle(5, 'mm'),
                   end_cap = circle(5, 'mm'), aes(edge_color = factor(compg.edges$link_check), width = weight), edge_alpha = 0.2) + 
  scale_edge_width(range = c(0.5, 3)) + 
  #  geom_edge_link(arrow = arrow(length = unit(4, 'mm')), 
  #                  end_cap = circle(5, 'mm'), aes(edge_color = factor(compg.edges$link_check)), edge_alpha = 0.2) + 
  # #geom_edge_link(aes(edge_colour = factor(input$link_check))) + 
  #geom_edge_link0(edge_colour = "black",edge_width = 0.1, edge_alpha = 0.5) + 
  scale_edge_colour_manual(values = c("grey","#660000")) + 
  #scale_color_manual(values=c("red","green"))+
  #scale_size(range=c(2,5),guide = FALSE)+
  theme_graph()+
  theme(legend.position = "bottom", legend.title=element_blank()) 

g1

# ggsave(filename = "/data/rajewsky/projects/cdr1as_ko_snRNA/codes_github/cdr1as/Figures/GRN_RF.pdf",
#        plot = g1,
#        scale = 1, width = 10, height = 8, units = "in", device = cairo_pdf,
#        dpi = 300)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.3.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C         LC_TIME=C           
##  [4] LC_COLLATE=C         LC_MONETARY=C        LC_MESSAGES=C       
##  [7] LC_PAPER=C           LC_NAME=C            LC_ADDRESS=C        
## [10] LC_TELEPHONE=C       LC_MEASUREMENT=C     LC_IDENTIFICATION=C 
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggraph_2.0.5        tidyr_1.1.3         igraph_1.2.6       
##  [4] GGally_2.1.2        randomForest_4.6-14 readxl_1.3.1       
##  [7] scales_1.1.1        stringr_1.4.0       paletteer_1.4.0    
## [10] cowplot_1.1.1       dplyr_1.0.7         ggrepel_0.9.1      
## [13] gridExtra_2.3       ggplot2_3.3.5      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7         assertthat_0.2.1   digest_0.6.27      utf8_1.2.2        
##  [5] ggforce_0.3.3      R6_2.5.1           cellranger_1.1.0   plyr_1.8.6        
##  [9] evaluate_0.14      highr_0.9          pillar_1.6.2       rlang_0.4.11      
## [13] jquerylib_0.1.4    DT_0.19            rmarkdown_2.11     labeling_0.4.2    
## [17] htmlwidgets_1.5.4  polyclip_1.10-0    munsell_0.5.0      compiler_4.0.4    
## [21] xfun_0.26          pkgconfig_2.0.3    htmltools_0.5.2    tidyselect_1.1.1  
## [25] tibble_3.1.4       graphlayouts_0.7.1 reshape_0.8.8      fansi_0.5.0       
## [29] viridisLite_0.4.0  crayon_1.4.1       withr_2.4.2        prismatic_1.1.0   
## [33] MASS_7.3-54        grid_4.0.4         jsonlite_1.7.2     gtable_0.3.0      
## [37] lifecycle_1.0.0    DBI_1.1.1          magrittr_2.0.1     stringi_1.7.4     
## [41] farver_2.1.0       viridis_0.6.1      bslib_0.3.0        ellipsis_0.3.2    
## [45] generics_0.1.0     vctrs_0.3.8        RColorBrewer_1.1-2 rematch2_2.1.2    
## [49] tools_4.0.4        glue_1.4.2         tweenr_1.0.2       purrr_0.3.4       
## [53] crosstalk_1.1.1    fastmap_1.1.0      yaml_2.2.1         colorspace_2.0-2  
## [57] tidygraph_1.2.0    knitr_1.34         sass_0.4.0